package us.codecraft.webmagic.processor.example;

import org.apache.commons.collections.CollectionUtils;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.selector.JsonPathSelector;

import java.util.List;

public class UxinPageProcessor implements PageProcessor {
    private Site site = Site.me().setRetryTimes(3).setSleepTime(10000).setTimeOut(10000);

    //http://www.youxinpai.com/halfMinUpdateList
    private static final String URLRULE = "http://www\\.youxinpai\\.com/secUpdateList";
    private static String firstUrl = "http://angularjs.cn/api/article/";

    @Override
    public void process(Page page) {
        if (page.getUrl().regex(URLRULE).match()) {
            //通过jsonpath得到json数据中的id内容，之后再拼凑待爬取链接
            List<String> carName = new JsonPathSelector("$.data.auctionPublishAndStatusList[*].publishID").selectList(page.getRawText());
            if (CollectionUtils.isNotEmpty(carName)) {
                for (String endUrl : carName) {
                    System.out.println(carName);
                    //page.addTargetRequest(firstUrl + endUrl);
                }
            }
        } else {
            //通过jsonpath从爬取到的json数据中提取出id和content内容
            page.putField("title", new JsonPathSelector("$.data").select(page.getRawText()));
            page.putField("content", new JsonPathSelector("$.data").select(page.getRawText()));
        }
    }

    @Override
    public Site getSite() {
        return site;
    }

    public static void main(String[] args) {
        //http://www.youxinpai.com/home/trade/detail/3358327/f1aa6e
        //http://www.youxinpai.com/trade
        for (int i = 1; i <= 10; i++) {
            Spider.create(new UxinPageProcessor()).addUrl("http://www.youxinpai.com/secUpdateList").thread(5).run();
        }
    }
}
